!pip install --upgrade plotly
!pip install sweetviz
!pip install dtale
!pip install numpy
!pip install pandas
!pip install pandas_profiling
import operator
import pandas as pd
import numpy as np
from datetime import datetime, date
from pandas_profiling import ProfileReport
import warnings
warnings.filterwarnings('ignore')
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
%matplotlib inline
df = pd.read_csv("cars_dataset.csv")
df.drop(['Unnamed: 0'],axis=1,inplace=True)
df.columns
Index(['Make', 'Model', 'Price', 'Year', 'Transmission', 'Listing Type',
'Body Type', 'Fuel Type', 'Engine Capacity - Cc', 'Engine Power - KW'],
dtype='object')
df['Price'] = df['Price'].fillna(df['Price'].mean()).astype(int)
df['Year'] = df['Year'].fillna(df['Year'].mean()).astype(int)
df['Engine Capacity - Cc'] = df['Engine Capacity - Cc'].fillna(df['Year'].mean()).astype(int)
df['Price'] = df['Price'].astype(int)
df['Year'] = df['Year'].astype(int)
df['Engine Capacity - Cc'] = df['Engine Capacity - Cc'].astype(int)
df['Engine Power - KW'] = df['Engine Power - KW'].astype('str')
df['Make'] = df['Make'].astype('str')
df['Model'] = df['Model'].astype('str')
df['Transmission'] = df['Transmission'].astype('str')
df['Listing Type'] = df['Listing Type'].astype('str')
df['Body Type'] = df['Body Type'].astype('str')
df['Fuel Type'] = df['Fuel Type'].astype('str')
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 1099 entries, 0 to 1098 Data columns (total 10 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Make 1099 non-null object 1 Model 1099 non-null object 2 Price 1099 non-null int32 3 Year 1099 non-null int32 4 Transmission 1099 non-null object 5 Listing Type 1099 non-null object 6 Body Type 1099 non-null object 7 Fuel Type 1099 non-null object 8 Engine Capacity - Cc 1099 non-null int32 9 Engine Power - KW 1099 non-null object dtypes: int32(3), object(7) memory usage: 73.1+ KB
df.isnull().sum()
Make 0 Model 0 Price 0 Year 0 Transmission 0 Listing Type 0 Body Type 0 Fuel Type 0 Engine Capacity - Cc 0 Engine Power - KW 0 dtype: int64
print("Total Unique Make : ",len(df['Make'].unique()))
Total Unique Make : 96
print("Total Unique Model : ",len(df['Model'].unique()))
Total Unique Model : 711
print("Total Unique Year : ",len(df['Year'].unique()))
Total Unique Year : 87
print("Total Unique Transmission : ",len(df['Transmission'].unique()))
Total Unique Transmission : 10
print("Total Unique Listing Type : ",len(df['Listing Type'].unique()))
Total Unique Listing Type : 3
print("Total Unique Body Type : ",len(df['Body Type'].unique()))
Total Unique Body Type : 19
print("Total Unique Fuel Type : ",len(df['Fuel Type'].unique()))
Total Unique Fuel Type : 9
df.describe()
| Price | Year | Engine Capacity - Cc | |
|---|---|---|---|
| count | 1.099000e+03 | 1099.000000 | 1099.000000 |
| mean | 1.600981e+06 | 1989.533212 | 2450.032757 |
| std | 8.903456e+07 | 22.721557 | 1384.976653 |
| min | -2.147484e+09 | 1920.000000 | 0.000000 |
| 25% | 2.799500e+04 | 1970.000000 | 1989.000000 |
| 50% | 4.999000e+04 | 1989.000000 | 1989.000000 |
| 75% | 1.189875e+05 | 2011.000000 | 2800.000000 |
| max | 2.000020e+09 | 2021.000000 | 7800.000000 |
df.dtypes
Make object Model object Price int32 Year int32 Transmission object Listing Type object Body Type object Fuel Type object Engine Capacity - Cc int32 Engine Power - KW object dtype: object
df
| Make | Model | Price | Year | Transmission | Listing Type | Body Type | Fuel Type | Engine Capacity - Cc | Engine Power - KW | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Porsche | 911 992 Carrera S Coupe 2dr PDK 8sp 3.0TT [MY21] | 7922666 | 2020 | nan | nan | nan | nan | 1989 | nan |
| 1 | BMW | i8 I12 LCI Coupe 2dr Auto 6sp AWD 1.5T/105kW ... | 7922666 | 2019 | nan | nan | nan | nan | 1989 | nan |
| 2 | Land Rover | Defender 90 Adventure Wagon 3dr Man 6sp 4x4 2... | 10999000 | 2015 | nan | nan | nan | nan | 1989 | nan |
| 3 | Porsche | 911 997 Series II Carrera Cabriolet 2dr PDK 7... | 7922666 | 2009 | nan | nan | nan | nan | 1989 | nan |
| 4 | Porsche | Cayenne 9YB Turbo Coupe 5dr Tiptronic 8sp 4x4... | 7922666 | 2020 | nan | nan | nan | nan | 1989 | nan |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 1094 | CHEVROLET | BELAIR | 7922666 | 1957 | Automatic | Used | Hardtop - Coupe | nan | 1989 | nan |
| 1095 | PLYMOUTH | ROAD RUNNER TRIBUTE | 7922666 | 1968 | Automatic | Used | Hardtop - Coupe | nan | 1989 | nan |
| 1096 | AC | COBRA | 7922666 | 2001 | Manual | Used | Roadster | nan | 1989 | nan |
| 1097 | HSV | W427 VE | 7922666 | 2009 | nan | Used | Sedan | nan | 1989 | nan |
| 1098 | CHEVROLET | CORVETTE C6 | 7922666 | 2008 | Manual | Used | Hardtop - Coupe | nan | 1989 | 377 |
1099 rows × 10 columns
profile = ProfileReport(df, title="Cars Profiling Report", explorative=True)
profile
profile.to_widgets()
profile.to_file("Cars Profiling Report.html")
import sweetviz as sv
cars_report = sv.analyze(df)
cars_report
cars_report.show_html('Automated_EDA_Cars_Dataset.html')
import dtale
d = dtale.show(df, ignore_duplicate=True)
d
d.adjust_cell_dimensions(height=100)
d.open_browser()
df['Make'].value_counts()
FORD 180
MERCEDES-BENZ 118
HOLDEN 103
CHEVROLET 76
BMW 51
...
Bentley 1
HARLEY DAVIDSON 1
Tesla 1
FLINT 1
COBRA 1
Name: Make, Length: 96, dtype: int64
df['Model'].value_counts()
MUSTANG 17
CORVETTE 14
RANGE ROVER 7
CAYENNE 7
ROADSTER 6
..
E-TYPE LIGHTWEIGHT 1
DB2/4 Mk 1 1
COMMODORE COMMODORE VF MOTORSPORT EDITION 1
20/25 HP OA 1
Range Rover L405 SDV8 Vogue SE Wagon 5dr Spts Auto 8sp 4x4 4.4DTT [MY19] 1
Name: Model, Length: 711, dtype: int64
df['Year'].value_counts()
1989 69
2015 46
2010 40
2013 39
2014 35
..
1940 1
1937 1
1953 1
1990 1
1920 1
Name: Year, Length: 87, dtype: int64
df['Body Type'].value_counts()
Sedan 289 Hardtop - Coupe 250 Wagon 115 Convertible 112 Ute 86 nan 54 Roadster 51 Hatchback 33 Van 32 Fastback 18 Panel Van 12 Cab Chassis 8 Soft Top 7 Targa 7 Limousine 7 Open Wheeler 6 Tourer 5 Tray 4 Style Side 3 Name: Body Type, dtype: int64
df['Fuel Type'].value_counts()
nan 415 Unleaded 253 Premium Unleaded 244 Diesel 106 Leaded Petrol 66 Dual Fuel 6 LPG 6 Ethanol 2 Hybrid 1 Name: Fuel Type, dtype: int64
df.columns
Index(['Make', 'Model', 'Price', 'Year', 'Transmission', 'Listing Type',
'Body Type', 'Fuel Type', 'Engine Capacity - Cc', 'Engine Power - KW'],
dtype='object')
df['Transmission'].value_counts()
Automatic 484 nan 312 Manual 262 Tiptronic 18 Manual Overdrive 7 Sequential Manual Auto 5 Sportmatic 3 Steptronic 3 CVT 3 Sequential Sports Shift 2 Name: Transmission, dtype: int64
df['Listing Type'].value_counts()
Used 1041 nan 54 New 4 Name: Listing Type, dtype: int64
df['Engine Capacity - Cc'].value_counts()
1989 585
0 25
2000 21
350 16
1600 15
...
307 1
4400 1
4806 1
5733 1
7000 1
Name: Engine Capacity - Cc, Length: 171, dtype: int64
df['Engine Power - KW'].value_counts()
nan 944
0 13
200 5
350 4
450 3
...
5766.0 1
243 1
114 1
44 1
283 1
Name: Engine Power - KW, Length: 89, dtype: int64
corr = df.corr()
fig = px.imshow(corr)
fig.show()
a = dict(df['Make'].value_counts())
insight1 = pd.DataFrame()
a = dict(sorted(a.items(), key=operator.itemgetter(1), reverse=True)[:20])
insight1['Make'] = a.keys()
insight1['Total Count'] = a.values()
insight1['Percentage'] = ((insight1['Total Count'] / 1099)*100).round(2)
insight1['Percentage'] = insight1['Percentage'].astype(str) + '%'
insight1.style.background_gradient(cmap='Reds')
| Make | Total Count | Percentage | |
|---|---|---|---|
| 0 | FORD | 180 | 16.38% |
| 1 | MERCEDES-BENZ | 118 | 10.74% |
| 2 | HOLDEN | 103 | 9.37% |
| 3 | CHEVROLET | 76 | 6.92% |
| 4 | BMW | 51 | 4.64% |
| 5 | JAGUAR | 41 | 3.73% |
| 6 | HSV | 30 | 2.73% |
| 7 | VOLKSWAGEN | 30 | 2.73% |
| 8 | PORSCHE | 30 | 2.73% |
| 9 | AUDI | 27 | 2.46% |
| 10 | TOYOTA | 27 | 2.46% |
| 11 | CHRYSLER | 26 | 2.37% |
| 12 | PONTIAC | 16 | 1.46% |
| 13 | LAND ROVER | 16 | 1.46% |
| 14 | DODGE | 14 | 1.27% |
| 15 | FERRARI | 14 | 1.27% |
| 16 | MAZDA | 14 | 1.27% |
| 17 | NISSAN | 13 | 1.18% |
| 18 | Ferrari | 12 | 1.09% |
| 19 | Porsche | 11 | 1.0% |
fig = px.treemap(insight1, path=['Make', 'Total Count', 'Percentage'], values='Total Count',title="Top 20 Car Makes")
fig.show()
b = dict(df['Model'].value_counts())
insight2 = pd.DataFrame()
b = dict(sorted(b.items(), key=operator.itemgetter(1), reverse=True)[:10])
insight2['Model'] = b.keys()
insight2['Total Count'] = b.values()
insight2['Percentage'] = ((insight2['Total Count'] / 1099)*100).round(2)
insight2['Percentage'] = insight2['Percentage'].astype(str) + '%'
insight2.style.background_gradient(cmap='Reds')
| Model | Total Count | Percentage | |
|---|---|---|---|
| 0 | MUSTANG | 17 | 1.55% |
| 1 | CORVETTE | 14 | 1.27% |
| 2 | RANGE ROVER | 7 | 0.64% |
| 3 | CAYENNE | 7 | 0.64% |
| 4 | ROADSTER | 6 | 0.55% |
| 5 | CONTINENTAL | 6 | 0.55% |
| 6 | BEL AIR | 6 | 0.55% |
| 7 | X5 | 6 | 0.55% |
| 8 | A8 | 5 | 0.45% |
| 9 | COUPE | 5 | 0.45% |
fig = px.funnel(insight2,x="Model",y="Total Count",color="Model",text="Percentage",title = "Top 10 Car Models")
fig.show()
c = dict(df['Year'].value_counts())
insight3 = pd.DataFrame()
c = dict(sorted(c.items(), key=operator.itemgetter(1), reverse=True)[:20])
insight3['Year'] = c.keys()
insight3['Total Count'] = c.values()
insight3['Percentage'] = ((insight3['Total Count'] / 1099)*100).round(2)
insight3['Percentage'] = insight3['Percentage'].astype(str) + "%"
insight3.style.background_gradient(cmap='Reds')
| Year | Total Count | Percentage | |
|---|---|---|---|
| 0 | 1989 | 69 | 6.28% |
| 1 | 2015 | 46 | 4.19% |
| 2 | 2010 | 40 | 3.64% |
| 3 | 2013 | 39 | 3.55% |
| 4 | 2014 | 35 | 3.18% |
| 5 | 2016 | 35 | 3.18% |
| 6 | 1970 | 33 | 3.0% |
| 7 | 2012 | 31 | 2.82% |
| 8 | 2017 | 29 | 2.64% |
| 9 | 1968 | 28 | 2.55% |
| 10 | 2008 | 26 | 2.37% |
| 11 | 1971 | 25 | 2.27% |
| 12 | 2011 | 25 | 2.27% |
| 13 | 2007 | 25 | 2.27% |
| 14 | 2009 | 22 | 2.0% |
| 15 | 1967 | 22 | 2.0% |
| 16 | 1975 | 20 | 1.82% |
| 17 | 1969 | 20 | 1.82% |
| 18 | 1965 | 19 | 1.73% |
| 19 | 1974 | 19 | 1.73% |
fig = px.sunburst(insight3, path=['Year', 'Total Count', 'Percentage'], values='Total Count',title="Top 20 Car Make Years")
fig.update_layout(margin = dict(t=0, l=0, r=0, b=0))
fig.show()
d = dict(df['Listing Type'].value_counts())
insight4 = pd.DataFrame()
d = dict(sorted(d.items(), key=operator.itemgetter(1), reverse=True)[:2])
insight4['Listing Type'] = d.keys()
insight4['Total Count'] = d.values()
insight4['Percentage'] = ((insight4['Total Count'] / 1099)*100).round(2)
insight4['Listing Type'] = insight4['Listing Type'].replace(r'nan', "Not available", regex=True)
insight4['Percentage'] = insight4['Percentage'].astype(str) + "%"
insight4.style.background_gradient(cmap='Reds')
| Listing Type | Total Count | Percentage | |
|---|---|---|---|
| 0 | Used | 1041 | 94.72% |
| 1 | Not available | 54 | 4.91% |
fig = px.bar(insight4, x='Listing Type',y='Total Count',text='Percentage',title="Car Listing Types",color='Listing Type')
fig.show()
e = dict(df['Body Type'].value_counts())
insight5 = pd.DataFrame()
e = dict(sorted(e.items(), key=operator.itemgetter(1), reverse=True)[:20])
insight5['Body Type'] = e.keys()
insight5['Total Count'] = e.values()
insight5['Percentage'] = ((insight5['Total Count'] / 1099)*100).round(2)
insight5['Percentage'] = insight5['Percentage'].astype(str) + "%"
insight5['Body Type'] = insight5['Body Type'].replace(r'nan', "Not available", regex=True)
insight5.style.background_gradient(cmap='Reds')
| Body Type | Total Count | Percentage | |
|---|---|---|---|
| 0 | Sedan | 289 | 26.3% |
| 1 | Hardtop - Coupe | 250 | 22.75% |
| 2 | Wagon | 115 | 10.46% |
| 3 | Convertible | 112 | 10.19% |
| 4 | Ute | 86 | 7.83% |
| 5 | Not available | 54 | 4.91% |
| 6 | Roadster | 51 | 4.64% |
| 7 | Hatchback | 33 | 3.0% |
| 8 | Van | 32 | 2.91% |
| 9 | Fastback | 18 | 1.64% |
| 10 | Panel Van | 12 | 1.09% |
| 11 | Cab Chassis | 8 | 0.73% |
| 12 | Soft Top | 7 | 0.64% |
| 13 | Targa | 7 | 0.64% |
| 14 | Limousine | 7 | 0.64% |
| 15 | Open Wheeler | 6 | 0.55% |
| 16 | Tourer | 5 | 0.45% |
| 17 | Tray | 4 | 0.36% |
| 18 | Style Side | 3 | 0.27% |
fig = px.bar(insight5, x="Body Type", y="Total Count",text = "Percentage",
title ="Car Body Types",color = "Body Type")
fig.show()
f = dict(df['Fuel Type'].value_counts())
insight6 = pd.DataFrame()
f = dict(sorted(f.items(), key=operator.itemgetter(1), reverse=True)[:10])
insight6['Fuel Type'] = f.keys()
insight6['Total Count'] = f.values()
insight6['Percentage'] = ((insight6['Total Count'] / 1099)*100).round(2)
insight6['Percentage'] = insight6['Percentage'].astype(str) + "%"
insight6['Fuel Type'] = insight6['Fuel Type'].replace(r'nan', "Not available", regex=True)
insight6.style.background_gradient(cmap='Reds')
| Fuel Type | Total Count | Percentage | |
|---|---|---|---|
| 0 | Not available | 415 | 37.76% |
| 1 | Unleaded | 253 | 23.02% |
| 2 | Premium Unleaded | 244 | 22.2% |
| 3 | Diesel | 106 | 9.65% |
| 4 | Leaded Petrol | 66 | 6.01% |
| 5 | Dual Fuel | 6 | 0.55% |
| 6 | LPG | 6 | 0.55% |
| 7 | Ethanol | 2 | 0.18% |
| 8 | Hybrid | 1 | 0.09% |
fig = px.bar(insight6, x="Fuel Type", y="Total Count",text = "Percentage",
title ="Car Fuel Type",color = "Fuel Type")
fig.show()
g = dict(df['Engine Capacity - Cc'].value_counts())
insight7 = pd.DataFrame()
g = dict(sorted(g.items(), key=operator.itemgetter(1), reverse=True)[:10])
insight7['Engine Capacity - Cc'] = g.keys()
insight7['Total Count'] = g.values()
insight7['Percentage'] = ((insight7['Total Count'] / 1099)*100).round(2)
insight7['Percentage'] = insight7['Percentage'].astype(str) + "%"
insight7['Engine Capacity - Cc'] = insight7['Engine Capacity - Cc'].round(2)
insight7.style.background_gradient(cmap='Reds')
| Engine Capacity - Cc | Total Count | Percentage | |
|---|---|---|---|
| 0 | 1989 | 585 | 53.23% |
| 1 | 0 | 25 | 2.27% |
| 2 | 2000 | 21 | 1.91% |
| 3 | 350 | 16 | 1.46% |
| 4 | 1600 | 15 | 1.36% |
| 5 | 5000 | 14 | 1.27% |
| 6 | 2993 | 12 | 1.09% |
| 7 | 3000 | 11 | 1.0% |
| 8 | 4200 | 10 | 0.91% |
| 9 | 3200 | 9 | 0.82% |
fig = px.funnel(insight7, x="Engine Capacity - Cc", y="Total Count",text = "Percentage",
title ="Top 10 Car Engine Capacity - Cc",color = "Total Count")
fig.show()
h = dict(df['Engine Power - KW'].value_counts())
insight8 = pd.DataFrame()
h = dict(sorted(h.items(), key=operator.itemgetter(1), reverse=True)[:10])
insight8['Engine Power - KW'] = h.keys()
insight8['Total Count'] = h.values()
insight8['Percentage'] = ((insight8['Total Count'] / 1099)*100).round(2)
insight8['Engine Power - KW'] = insight8['Engine Power - KW'].replace(r'nan', "Not available", regex=True)
insight8['Percentage'] = insight8['Percentage'].astype(str) + "%"
insight8.style.background_gradient(cmap='Reds')
| Engine Power - KW | Total Count | Percentage | |
|---|---|---|---|
| 0 | Not available | 944 | 85.9% |
| 1 | 0 | 13 | 1.18% |
| 2 | 200 | 5 | 0.45% |
| 3 | 350 | 4 | 0.36% |
| 4 | 450 | 3 | 0.27% |
| 5 | 290 | 3 | 0.27% |
| 6 | 45 | 3 | 0.27% |
| 7 | 190 | 3 | 0.27% |
| 8 | 149 | 3 | 0.27% |
| 9 | 330 | 2 | 0.18% |
fig = px.bar(insight8, x="Engine Power - KW", y="Total Count",text = "Percentage",
title ="Top 10 Car Engine Power - KW",color = "Total Count")
fig.show()
df['Price'] = df['Price'].mask(df.Price < 0, 0)
i = dict(df['Price'].value_counts())
insight9 = pd.DataFrame()
i = dict(sorted(i.items(), key=operator.itemgetter(1), reverse=True)[:10])
insight9['Price'] = i.keys()
insight9['Total Count'] = i.values()
insight9['Percentage'] = ((insight9['Total Count'] / 1099)*100).round(2)
insight9['Percentage'] = insight9['Percentage'].astype(str) + "%"
insight9.style.background_gradient(cmap='Reds')
| Price | Total Count | Percentage | |
|---|---|---|---|
| 0 | 7922666 | 74 | 6.73% |
| 1 | 39990 | 25 | 2.27% |
| 2 | 59990 | 22 | 2.0% |
| 3 | 49990 | 20 | 1.82% |
| 4 | 25000 | 17 | 1.55% |
| 5 | 19990 | 15 | 1.36% |
| 6 | 29990 | 14 | 1.27% |
| 7 | 34990 | 14 | 1.27% |
| 8 | 14990 | 14 | 1.27% |
| 9 | 45000 | 13 | 1.18% |
fig = px.sunburst(insight9, path = ['Price','Total Count','Percentage'],
title ="Top 10 Car Price",color = "Total Count")
fig.show()
df['COUNTER'] = 1
insight10 = df.groupby(['Make','Model'])['COUNTER'].agg(['count'])
insight10['Percentage'] = ((insight10['count'] / 1099)*100).round(2)
insight10['Percentage'] = insight10['Percentage'].astype(str) + '%'
insight10 = insight10.reset_index(['Make','Model'])
insight10= insight10.nlargest(7,'count')
insight10.reset_index(inplace=True,drop=True)
insight10.style.background_gradient(cmap='Reds')
| Make | Model | count | Percentage | |
|---|---|---|---|---|
| 0 | FORD | MUSTANG | 17 | 1.55% |
| 1 | CHEVROLET | CORVETTE | 14 | 1.27% |
| 2 | LAND ROVER | RANGE ROVER | 7 | 0.64% |
| 3 | PORSCHE | CAYENNE | 7 | 0.64% |
| 4 | BMW | X5 | 6 | 0.55% |
| 5 | CHEVROLET | BEL AIR | 6 | 0.55% |
| 6 | AUDI | A8 | 5 | 0.45% |
fig = px.bar(insight10,x="Make", y="count", color="Model",barmode='group', text="Percentage",
title = "Top 7 Car Makes according to Model")
fig.show()
df['COUNTER'] = 1
insight11 = df.groupby(['Make','Year'])['COUNTER'].agg(['count'])
insight11['Percentage'] = ((insight11['count'] / 1099)*100).round(2)
insight11['Percentage'] = insight11['Percentage'].astype(str) + '%'
insight11 = insight11.reset_index(['Make','Year'])
insight11 = insight11.nlargest(10,'count')
insight11.reset_index(inplace=True,drop=True)
insight11.style.background_gradient(cmap='Reds')
| Make | Year | count | Percentage | |
|---|---|---|---|---|
| 0 | MERCEDES-BENZ | 2015 | 14 | 1.27% |
| 1 | FORD | 1971 | 13 | 1.18% |
| 2 | FORD | 1970 | 12 | 1.09% |
| 3 | MERCEDES-BENZ | 2013 | 12 | 1.09% |
| 4 | FORD | 1989 | 11 | 1.0% |
| 5 | MERCEDES-BENZ | 2010 | 10 | 0.91% |
| 6 | CHEVROLET | 1968 | 9 | 0.82% |
| 7 | FORD | 1968 | 9 | 0.82% |
| 8 | FORD | 1973 | 9 | 0.82% |
| 9 | HOLDEN | 1975 | 9 | 0.82% |
fig = px.bar(insight11,x="Make", y="count", color="Year",barmode='group',text = "Percentage",
title = "Top 10 Car Makes according to Year")
fig.show()
df['COUNTER'] = 1
insight12 = df.groupby(['Make','Transmission'])['COUNTER'].agg(['count'])
insight12['Percentage'] = ((insight12['count'] / 1099)*100).round(2)
insight12['Percentage'] = insight12['Percentage'].astype(str) + '%'
insight12 = insight12.reset_index(['Make','Transmission'])
insight12['Transmission'] = insight12['Transmission'].replace(r'nan', "Not available", regex=True)
insight12 = insight12.nlargest(10,'count')
insight12.reset_index(inplace=True,drop=True)
insight12.style.background_gradient(cmap='Reds')
| Make | Transmission | count | Percentage | |
|---|---|---|---|---|
| 0 | FORD | Automatic | 92 | 8.37% |
| 1 | MERCEDES-BENZ | Automatic | 80 | 7.28% |
| 2 | HOLDEN | Automatic | 49 | 4.46% |
| 3 | FORD | Manual | 46 | 4.19% |
| 4 | FORD | Not available | 41 | 3.73% |
| 5 | CHEVROLET | Automatic | 37 | 3.37% |
| 6 | MERCEDES-BENZ | Not available | 35 | 3.18% |
| 7 | HOLDEN | Manual | 29 | 2.64% |
| 8 | CHEVROLET | Manual | 26 | 2.37% |
| 9 | BMW | Not available | 24 | 2.18% |
fig = px.bar(insight12,x="Make", y="count", color="Transmission",barmode='group',text = "Percentage",
title = "Top 10 Car Makes according to Transmission")
fig.show()
df['COUNTER'] = 1
insight13 = df.groupby(['Make','Listing Type'])['COUNTER'].agg(['count'])
insight13['Percentage'] = ((insight13['count'] / 1099)*100).round(2)
insight13['Percentage'] = insight13['Percentage'].astype(str) + '%'
insight13 = insight13.reset_index(['Make','Listing Type'])
insight13 = insight13.nlargest(10,'count')
insight13.reset_index(inplace=True,drop=True)
insight13.style.background_gradient(cmap='Reds')
| Make | Listing Type | count | Percentage | |
|---|---|---|---|---|
| 0 | FORD | Used | 180 | 16.38% |
| 1 | MERCEDES-BENZ | Used | 118 | 10.74% |
| 2 | HOLDEN | Used | 103 | 9.37% |
| 3 | CHEVROLET | Used | 76 | 6.92% |
| 4 | BMW | Used | 46 | 4.19% |
| 5 | JAGUAR | Used | 39 | 3.55% |
| 6 | HSV | Used | 30 | 2.73% |
| 7 | PORSCHE | Used | 30 | 2.73% |
| 8 | VOLKSWAGEN | Used | 30 | 2.73% |
| 9 | AUDI | Used | 27 | 2.46% |
fig = px.bar(insight13,x="Make", y="count", color="Listing Type",barmode='group',text = "Percentage",
title = "Top 10 Car Makes according to Listing Type")
fig.show()
df['COUNTER'] = 1
insight14 = df.groupby(['Make','Body Type'])['COUNTER'].agg(['count'])
insight14['Percentage'] = ((insight14['count'] / 1099)*100).round(2)
insight14['Percentage'] = insight14['Percentage'].astype(str) + '%'
insight14 = insight14.reset_index(['Make','Body Type'])
insight14 = insight14.nlargest(10,'count')
insight14.reset_index(inplace=True,drop=True)
insight14.style.background_gradient(cmap='Reds')
| Make | Body Type | count | Percentage | |
|---|---|---|---|---|
| 0 | HOLDEN | Sedan | 53 | 4.82% |
| 1 | FORD | Sedan | 44 | 4.0% |
| 2 | MERCEDES-BENZ | Sedan | 44 | 4.0% |
| 3 | FORD | Hardtop - Coupe | 41 | 3.73% |
| 4 | FORD | Ute | 37 | 3.37% |
| 5 | CHEVROLET | Hardtop - Coupe | 35 | 3.18% |
| 6 | MERCEDES-BENZ | Hardtop - Coupe | 27 | 2.46% |
| 7 | HSV | Sedan | 24 | 2.18% |
| 8 | JAGUAR | Sedan | 20 | 1.82% |
| 9 | HOLDEN | Hardtop - Coupe | 16 | 1.46% |
fig = px.bar(insight14,x="Make", y="count", color="Body Type",barmode='group',text = "Percentage",
title = "Top 10 Car Makes according to Body Type")
fig.show()
df['COUNTER'] = 1
insight15 = df.groupby(['Make','Fuel Type'])['COUNTER'].agg(['count'])
insight15['Percentage'] = ((insight15['count'] / 1099)*100).round(2)
insight15['Percentage'] = insight15['Percentage'].astype(str) + '%'
insight15 = insight15.reset_index(['Make','Fuel Type'])
insight15['Fuel Type'] = insight15['Fuel Type'].replace(r'nan', "Not available", regex=True)
insight15 = insight15.nlargest(10,'count')
insight15.reset_index(inplace=True,drop=True)
insight15.style.background_gradient(cmap='Reds')
| Make | Fuel Type | count | Percentage | |
|---|---|---|---|---|
| 0 | FORD | Unleaded | 61 | 5.55% |
| 1 | FORD | Not available | 59 | 5.37% |
| 2 | HOLDEN | Not available | 38 | 3.46% |
| 3 | MERCEDES-BENZ | Not available | 38 | 3.46% |
| 4 | MERCEDES-BENZ | Premium Unleaded | 35 | 3.18% |
| 5 | CHEVROLET | Unleaded | 33 | 3.0% |
| 6 | FORD | Premium Unleaded | 33 | 3.0% |
| 7 | HOLDEN | Premium Unleaded | 31 | 2.82% |
| 8 | BMW | Not available | 24 | 2.18% |
| 9 | MERCEDES-BENZ | Unleaded | 21 | 1.91% |
fig = px.bar(insight15,x="Make", y="count", color="Fuel Type",barmode='group',text = "Percentage",
title = "Top 10 Car Makes according to Fuel Type")
fig.show()
df['COUNTER'] = 1
insight16 = df.groupby(['Make','Engine Capacity - Cc'])['COUNTER'].agg(['count'])
insight16['Percentage'] = ((insight16['count'] / 1099)*100).round(2)
insight16['Percentage'] = insight16['Percentage'].astype(str) + '%'
insight16 = insight16.reset_index(['Make','Engine Capacity - Cc'])
insight16 = insight16.nlargest(10,'count')
insight16.reset_index(inplace=True,drop=True)
insight16.style.background_gradient(cmap='Reds')
| Make | Engine Capacity - Cc | count | Percentage | |
|---|---|---|---|---|
| 0 | FORD | 1989 | 107 | 9.74% |
| 1 | HOLDEN | 1989 | 70 | 6.37% |
| 2 | MERCEDES-BENZ | 1989 | 54 | 4.91% |
| 3 | CHEVROLET | 1989 | 48 | 4.37% |
| 4 | BMW | 1989 | 25 | 2.27% |
| 5 | HSV | 1989 | 23 | 2.09% |
| 6 | CHRYSLER | 1989 | 16 | 1.46% |
| 7 | JAGUAR | 1989 | 16 | 1.46% |
| 8 | PONTIAC | 1989 | 16 | 1.46% |
| 9 | AUDI | 1989 | 12 | 1.09% |
fig = px.bar(insight16,x="Make", y="count", color="Engine Capacity - Cc",barmode='group',text = "Percentage",
title = "Top 10 Car Makes according to Engine Capacity - Cc")
fig.show()
df['COUNTER'] = 1
insight17 = df.groupby(['Make','Engine Power - KW'])['COUNTER'].agg(['count'])
insight17['Percentage'] = ((insight17['count'] / 1099)*100).round(2)
insight17['Percentage'] = insight17['Percentage'].astype(str) + '%'
insight17 = insight17.reset_index(['Make','Engine Power - KW'])
insight17['Engine Power - KW'] = insight17['Engine Power - KW'].replace(r'nan', "Not available", regex=True)
insight17 = insight17.nlargest(10,'count')
insight17.reset_index(inplace=True,drop=True)
insight17.style.background_gradient(cmap='Reds')
| Make | Engine Power - KW | count | Percentage | |
|---|---|---|---|---|
| 0 | FORD | Not available | 145 | 13.19% |
| 1 | MERCEDES-BENZ | Not available | 112 | 10.19% |
| 2 | HOLDEN | Not available | 73 | 6.64% |
| 3 | CHEVROLET | Not available | 65 | 5.91% |
| 4 | BMW | Not available | 49 | 4.46% |
| 5 | JAGUAR | Not available | 37 | 3.37% |
| 6 | VOLKSWAGEN | Not available | 28 | 2.55% |
| 7 | HSV | Not available | 27 | 2.46% |
| 8 | PORSCHE | Not available | 27 | 2.46% |
| 9 | AUDI | Not available | 23 | 2.09% |
fig = px.bar(insight17,x="Make", y="count",barmode='group',text = "Percentage",
title = "Top 10 Car Makes according to Engine Power - KW")
fig.show()
df['COUNTER'] = 1
insight18 = df.groupby(['Make','Price'])['COUNTER'].agg(['count'])
insight18['Percentage'] = ((insight18['count'] / 1099)*100).round(2)
insight18['Percentage'] = insight18['Percentage'].astype(str) + '%'
insight18 = insight18.reset_index(['Make','Price'])
insight18 = insight18.nlargest(10,'count')
insight18.reset_index(inplace=True,drop=True)
insight18.style.background_gradient(cmap='Reds')
| Make | Price | count | Percentage | |
|---|---|---|---|---|
| 0 | FORD | 7922666 | 13 | 1.18% |
| 1 | HOLDEN | 7922666 | 12 | 1.09% |
| 2 | MERCEDES-BENZ | 49990 | 8 | 0.73% |
| 3 | MERCEDES-BENZ | 29990 | 7 | 0.64% |
| 4 | MERCEDES-BENZ | 39990 | 7 | 0.64% |
| 5 | AUDI | 39990 | 6 | 0.55% |
| 6 | CHEVROLET | 7922666 | 6 | 0.55% |
| 7 | FORD | 95000 | 6 | 0.55% |
| 8 | MERCEDES-BENZ | 26990 | 6 | 0.55% |
| 9 | FORD | 59990 | 5 | 0.45% |
fig = px.bar(insight18,x="Make", y="count", color="Price",barmode='group',text = "Percentage",
title = "Top 10 Car Makes according to Price")
fig.show()
df['COUNTER'] = 1
insight19 = df.groupby(['Model','Transmission'])['COUNTER'].agg(['count'])
insight19['Percentage'] = ((insight19['count'] / 1099)*100).round(2)
insight19['Percentage'] = insight19['Percentage'].astype(str) + '%'
insight19 = insight19.reset_index(['Model','Transmission'])
insight19 = insight19.nlargest(10,'count')
insight19.reset_index(inplace=True,drop=True)
insight19.style.background_gradient(cmap='Reds')
| Model | Transmission | count | Percentage | |
|---|---|---|---|---|
| 0 | MUSTANG | Manual | 8 | 0.73% |
| 1 | RANGE ROVER | Automatic | 7 | 0.64% |
| 2 | CONTINENTAL | Automatic | 6 | 0.55% |
| 3 | CORVETTE | Manual | 6 | 0.55% |
| 4 | MUSTANG | Automatic | 6 | 0.55% |
| 5 | X5 | Automatic | 6 | 0.55% |
| 6 | A8 | Tiptronic | 5 | 0.45% |
| 7 | CAYENNE | Tiptronic | 5 | 0.45% |
| 8 | CORVETTE | Automatic | 5 | 0.45% |
| 9 | ROADSTER | Automatic | 5 | 0.45% |
fig = px.bar(insight19,x="Model", y="count", color="Transmission",barmode='group',text = "Percentage",
title = "Top 10 Car Models according to Transmission")
fig.show()
df['COUNTER'] = 1
insight20 = df.groupby(['Model','Listing Type'])['COUNTER'].agg(['count'])
insight20['Percentage'] = ((insight20['count'] / 1099)*100).round(2)
insight20['Percentage'] = insight20['Percentage'].astype(str) + '%'
insight20 = insight20.reset_index(['Model','Listing Type'])
insight20 = insight20.nlargest(10,'count')
insight20.reset_index(inplace=True,drop=True)
insight20.style.background_gradient(cmap='Reds')
| Model | Listing Type | count | Percentage | |
|---|---|---|---|---|
| 0 | MUSTANG | Used | 17 | 1.55% |
| 1 | CORVETTE | Used | 14 | 1.27% |
| 2 | CAYENNE | Used | 7 | 0.64% |
| 3 | RANGE ROVER | Used | 7 | 0.64% |
| 4 | BEL AIR | Used | 6 | 0.55% |
| 5 | CONTINENTAL | Used | 6 | 0.55% |
| 6 | ROADSTER | Used | 6 | 0.55% |
| 7 | X5 | Used | 6 | 0.55% |
| 8 | A8 | Used | 5 | 0.45% |
| 9 | BONNEVILLE | Used | 5 | 0.45% |
fig = px.bar(insight20,x="Model", y="count", color="Listing Type",barmode='group',text = "Percentage",
title = "Top 10 Car Models according to Listing Type")
fig.show()
df['COUNTER'] = 1
insight21 = df.groupby(['Model','Body Type'])['COUNTER'].agg(['count'])
insight21['Percentage'] = ((insight21['count'] / 1099)*100).round(2)
insight21['Percentage'] = insight21['Percentage'].astype(str) + '%'
insight21 = insight21.reset_index(['Model','Body Type'])
insight21 = insight21.nlargest(10,'count')
insight21.reset_index(inplace=True,drop=True)
insight21.style.background_gradient(cmap='Reds')
| Model | Body Type | count | Percentage | |
|---|---|---|---|---|
| 0 | MUSTANG | Hardtop - Coupe | 12 | 1.09% |
| 1 | CAYENNE | Wagon | 7 | 0.64% |
| 2 | RANGE ROVER | Wagon | 7 | 0.64% |
| 3 | BEL AIR | Hardtop - Coupe | 6 | 0.55% |
| 4 | CORVETTE | Hardtop - Coupe | 6 | 0.55% |
| 5 | ROADSTER | Roadster | 6 | 0.55% |
| 6 | X5 | Wagon | 6 | 0.55% |
| 7 | A8 | Sedan | 5 | 0.45% |
| 8 | 3000 MK III BJ8 | Convertible | 4 | 0.36% |
| 9 | CORVETTE | Roadster | 4 | 0.36% |
fig = px.bar(insight21,x="Model", y="count", color="Body Type",barmode='group',text = "Percentage",
title = "Top 10 Car Models according to Body Type")
fig.show()
df['COUNTER'] = 1
insight22 = df.groupby(['Model','Fuel Type'])['COUNTER'].agg(['count'])
insight22['Percentage'] = ((insight22['count'] / 1099)*100).round(2)
insight22['Percentage'] = insight22['Percentage'].astype(str) + '%'
insight22 = insight22.reset_index(['Model','Fuel Type'])
insight22['Fuel Type'] = insight22['Fuel Type'].replace(r'nan', "Not available", regex=True)
insight22 = insight22.nlargest(10,'count')
insight22.reset_index(inplace=True,drop=True)
insight22.style.background_gradient(cmap='Reds')
| Model | Fuel Type | count | Percentage | |
|---|---|---|---|---|
| 0 | MUSTANG | Not available | 8 | 0.73% |
| 1 | RANGE ROVER | Diesel | 7 | 0.64% |
| 2 | CONTINENTAL | Premium Unleaded | 5 | 0.45% |
| 3 | MUSTANG | Unleaded | 5 | 0.45% |
| 4 | X5 | Diesel | 5 | 0.45% |
| 5 | 3000 MK III BJ8 | Leaded Petrol | 4 | 0.36% |
| 6 | 450SL | Unleaded | 4 | 0.36% |
| 7 | 560 | Unleaded | 4 | 0.36% |
| 8 | C200 | Premium Unleaded | 4 | 0.36% |
| 9 | C250 | Premium Unleaded | 4 | 0.36% |
fig = px.bar(insight22,x="Model", y="count", color="Fuel Type",barmode='group',text = "Percentage",
title = "Top 10 Car Models according to Fuel Type")
fig.show()
df['COUNTER'] = 1
insight23 = df.groupby(['Model','Engine Capacity - Cc'])['COUNTER'].agg(['count'])
insight23['Percentage'] = ((insight23['count'] / 1099)*100).round(2)
insight23['Percentage'] = insight23['Percentage'].astype(str) + '%'
insight23 = insight23.reset_index(['Model','Engine Capacity - Cc'])
insight23 = insight23.nlargest(10,'count')
insight23.reset_index(inplace=True,drop=True)
insight23.style.background_gradient(cmap='Reds')
| Model | Engine Capacity - Cc | count | Percentage | |
|---|---|---|---|---|
| 0 | CORVETTE | 1989 | 9 | 0.82% |
| 1 | MUSTANG | 1989 | 9 | 0.82% |
| 2 | BONNEVILLE | 1989 | 5 | 0.45% |
| 3 | MUSTANG | 0 | 5 | 0.45% |
| 4 | ROADSTER | 1989 | 5 | 0.45% |
| 5 | 3000 MK III BJ8 | 2912 | 4 | 0.36% |
| 6 | 450SL | 1989 | 4 | 0.36% |
| 7 | 560 | 5547 | 4 | 0.36% |
| 8 | 98 | 1989 | 4 | 0.36% |
| 9 | BEL AIR | 1989 | 4 | 0.36% |
fig = px.bar(insight23,x="Model", y="count", color="Engine Capacity - Cc",barmode='group',text = "Percentage",
title = "Top 10 Car Models according to Engine Capacity - Cc")
fig.show()
df['COUNTER'] = 1
insight24 = df.groupby(['Model','Engine Power - KW'])['COUNTER'].agg(['count'])
insight24['Percentage'] = ((insight24['count'] / 1099)*100).round(2)
insight24['Percentage'] = insight24['Percentage'].astype(str) + '%'
insight24 = insight24.reset_index(['Model','Engine Power - KW'])
insight24['Engine Power - KW'] = insight24['Engine Power - KW'].replace(r'nan', "Not available", regex=True)
insight24 = insight24.nlargest(10,'count')
insight24.reset_index(inplace=True,drop=True)
insight24.style.background_gradient(cmap='Reds')
| Model | Engine Power - KW | count | Percentage | |
|---|---|---|---|---|
| 0 | MUSTANG | Not available | 15 | 1.36% |
| 1 | CORVETTE | Not available | 12 | 1.09% |
| 2 | CAYENNE | Not available | 7 | 0.64% |
| 3 | RANGE ROVER | Not available | 7 | 0.64% |
| 4 | CONTINENTAL | Not available | 6 | 0.55% |
| 5 | ROADSTER | Not available | 6 | 0.55% |
| 6 | X5 | Not available | 6 | 0.55% |
| 7 | A8 | Not available | 5 | 0.45% |
| 8 | BONNEVILLE | Not available | 5 | 0.45% |
| 9 | 3000 MK III BJ8 | Not available | 4 | 0.36% |
fig = px.bar(insight24,x="Model", y="count", barmode='group',text = "Percentage",
title = "Top 10 Car Models according to Engine Power - KW")
fig.show()
df['COUNTER'] = 1
insight25 = df.groupby(['Model','Year'])['COUNTER'].agg(['count'])
insight25['Percentage'] = ((insight25['count'] / 1099)*100).round(2)
insight25['Percentage'] = insight25['Percentage'].astype(str) + '%'
insight25 = insight25.reset_index(['Model','Year'])
insight25 = insight25.nlargest(10,'count')
insight25.reset_index(inplace=True,drop=True)
insight25.style.background_gradient(cmap='Reds')
| Model | Year | count | Percentage | |
|---|---|---|---|---|
| 0 | BEL AIR | 1955 | 4 | 0.36% |
| 1 | FAIRLANE ZD | 1971 | 4 | 0.36% |
| 2 | ROADSTER | 1989 | 4 | 0.36% |
| 3 | X5 | 2014 | 4 | 0.36% |
| 4 | COUPE Master 85 | 1939 | 3 | 0.27% |
| 5 | 308 GTB Vetroresina | 1976 | 3 | 0.27% |
| 6 | BONNEVILLE | 1963 | 3 | 0.27% |
| 7 | C-CLASS W204 MY13 | 2012 | 3 | 0.27% |
| 8 | CAMARO | 1968 | 3 | 0.27% |
| 9 | CHALLENGER | 1970 | 3 | 0.27% |
fig = px.bar(insight25,x="Model", y="count", color="Year",barmode='group',text = "Percentage",
title = "Top 10 Car Models according to Year")
fig.show()
df['COUNTER'] = 1
insight26 = df.groupby(['Model','Price'])['COUNTER'].agg(['count'])
insight26['Percentage'] = ((insight26['count'] / 1099)*100).round(2)
insight26['Percentage'] = insight26['Percentage'].astype(str) + '%'
insight26 = insight26.reset_index(['Model','Price'])
insight26 = insight26.nlargest(10,'count')
insight26.reset_index(inplace=True,drop=True)
insight26.style.background_gradient(cmap='Reds')
| Model | Price | count | Percentage | |
|---|---|---|---|---|
| 0 | A8 | 39990 | 4 | 0.36% |
| 1 | FAIRLANE ZD | 82500 | 4 | 0.36% |
| 2 | ROADSTER | 40000 | 4 | 0.36% |
| 3 | COUPE Master 85 | 28000 | 3 | 0.27% |
| 4 | C-CLASS W204 MY13 | 26990 | 3 | 0.27% |
| 5 | HX GTS Clone | 50000 | 3 | 0.27% |
| 6 | MUSTANG | 45000 | 3 | 0.27% |
| 7 | MUSTANG | 129990 | 3 | 0.27% |
| 8 | TORANA LX | 7922666 | 3 | 0.27% |
| 9 | X7 | 65000 | 2 | 0.18% |
fig = px.bar(insight26,x="Model", y="count", color="Price",barmode='group',text = "Percentage",
title = "Top 10 Car Models according to Price")
fig.show()
df['COUNTER'] = 1
insight27 = df.groupby(['Body Type','Price'])['COUNTER'].agg(['count'])
insight27['Percentage'] = ((insight27['count'] / 1099)*100).round(2)
insight27['Percentage'] = insight27['Percentage'].astype(str) + '%'
insight27 = insight27.reset_index(['Body Type','Price'])
insight27 = insight27.nlargest(10,'count')
insight27.reset_index(inplace=True,drop=True)
insight27.style.background_gradient(cmap='Reds')
| Body Type | Price | count | Percentage | |
|---|---|---|---|---|
| 0 | Hardtop - Coupe | 7922666 | 24 | 2.18% |
| 1 | nan | 7922666 | 15 | 1.36% |
| 2 | Sedan | 39990 | 14 | 1.27% |
| 3 | Sedan | 7922666 | 14 | 1.27% |
| 4 | Sedan | 25000 | 9 | 0.82% |
| 5 | Hardtop - Coupe | 129990 | 8 | 0.73% |
| 6 | Sedan | 49990 | 8 | 0.73% |
| 7 | Convertible | 7922666 | 6 | 0.55% |
| 8 | Hardtop - Coupe | 95000 | 6 | 0.55% |
| 9 | Sedan | 20000 | 6 | 0.55% |
fig = px.bar(insight27,x="Body Type", y="count", color="Price",barmode='group',text = "Percentage",
title = "Top 10 Car Body Type according to Price")
fig.show()
df['COUNTER'] = 1
insight28 = df.groupby(['Fuel Type','Price'])['COUNTER'].agg(['count'])
insight28['Percentage'] = ((insight28['count'] / 1099)*100).round(2)
insight28['Percentage'] = insight28['Percentage'].astype(str) + '%'
insight28 = insight28.reset_index(['Fuel Type','Price'])
insight28['Fuel Type'] = insight28['Fuel Type'].replace(r'nan', "Not available", regex=True)
insight28 = insight28.nlargest(10,'count')
insight28.reset_index(inplace=True,drop=True)
insight28.style.background_gradient(cmap='Reds')
| Fuel Type | Price | count | Percentage | |
|---|---|---|---|---|
| 0 | Not available | 7922666 | 70 | 6.37% |
| 1 | Not available | 14990 | 10 | 0.91% |
| 2 | Not available | 49990 | 10 | 0.91% |
| 3 | Diesel | 29990 | 8 | 0.73% |
| 4 | Premium Unleaded | 25000 | 8 | 0.73% |
| 5 | Premium Unleaded | 95000 | 8 | 0.73% |
| 6 | Unleaded | 59990 | 8 | 0.73% |
| 7 | Not available | 39990 | 8 | 0.73% |
| 8 | Diesel | 39990 | 7 | 0.64% |
| 9 | Unleaded | 35000 | 7 | 0.64% |
fig = px.bar(insight28,x="Fuel Type", y="count", color="Price",barmode='group',text = "Percentage",
title = "Top 10 Car Fuel Type according to Price")
fig.show()
df['COUNTER'] = 1
insight29 = df.groupby(['Listing Type','Price'])['COUNTER'].agg(['count'])
insight29['Percentage'] = ((insight29['count'] / 1099)*100).round(2)
insight29['Percentage'] = insight29['Percentage'].astype(str) + '%'
insight29 = insight29.reset_index(['Listing Type','Price'])
insight29 = insight29.nlargest(5,'count')
insight29.reset_index(inplace=True,drop=True)
insight29.style.background_gradient(cmap='Reds')
| Listing Type | Price | count | Percentage | |
|---|---|---|---|---|
| 0 | Used | 7922666 | 59 | 5.37% |
| 1 | Used | 39990 | 25 | 2.27% |
| 2 | Used | 59990 | 22 | 2.0% |
| 3 | Used | 49990 | 20 | 1.82% |
| 4 | Used | 25000 | 17 | 1.55% |
fig = px.bar(insight29,x="Listing Type", y="count", color="Price",barmode='group',text = "Percentage",
title = "Top 5 Car Listing Type according to Price")
fig.show()
df['COUNTER'] = 1
insight30 = df.groupby(['Transmission','Price'])['COUNTER'].agg(['count'])
insight30['Percentage'] = ((insight30['count'] / 1099)*100).round(2)
insight30['Percentage'] = insight30['Percentage'].astype(str) + '%'
insight30 = insight30.reset_index(['Transmission','Price'])
insight30['Transmission'] = insight30['Transmission'].replace(r'nan', "Not available", regex=True)
insight30 = insight30.nlargest(10,'count')
insight30.reset_index(inplace=True,drop=True)
insight30.style.background_gradient(cmap='Reds')
| Transmission | Price | count | Percentage | |
|---|---|---|---|---|
| 0 | Not available | 7922666 | 31 | 2.82% |
| 1 | Automatic | 7922666 | 25 | 2.27% |
| 2 | Manual | 7922666 | 18 | 1.64% |
| 3 | Automatic | 59990 | 10 | 0.91% |
| 4 | Not available | 14990 | 10 | 0.91% |
| 5 | Not available | 49990 | 10 | 0.91% |
| 6 | Automatic | 29990 | 9 | 0.82% |
| 7 | Automatic | 39990 | 9 | 0.82% |
| 8 | Automatic | 49990 | 9 | 0.82% |
| 9 | Automatic | 69990 | 8 | 0.73% |
fig = px.bar(insight30,x="Transmission", y="count", color="Price",barmode='group',text = "Percentage",
title = "Top 10 Transmission according to Price")
fig.show()
df['COUNTER'] = 1
insight31 = df.groupby(['Engine Capacity - Cc','Price'])['COUNTER'].agg(['count'])
insight31['Percentage'] = ((insight31['count'] / 1099)*100).round(2)
insight31['Percentage'] = insight31['Percentage'].astype(str) + '%'
insight31 = insight31.reset_index(['Engine Capacity - Cc','Price'])
insight31 = insight31.nlargest(10,'count')
insight31.reset_index(inplace=True,drop=True)
insight31.style.background_gradient(cmap='Reds')
| Engine Capacity - Cc | Price | count | Percentage | |
|---|---|---|---|---|
| 0 | 1989 | 7922666 | 70 | 6.37% |
| 1 | 1989 | 39990 | 12 | 1.09% |
| 2 | 1989 | 59990 | 12 | 1.09% |
| 3 | 1989 | 49990 | 11 | 1.0% |
| 4 | 1989 | 14990 | 10 | 0.91% |
| 5 | 1989 | 40000 | 9 | 0.82% |
| 6 | 1989 | 45000 | 9 | 0.82% |
| 7 | 1989 | 25000 | 8 | 0.73% |
| 8 | 1989 | 19990 | 7 | 0.64% |
| 9 | 1989 | 26990 | 7 | 0.64% |
fig = px.bar(insight31,x="Engine Capacity - Cc", y="count", color="Price",barmode='group',text = "Percentage",
title = "Top 10 Car Engine Capacity - Cc according to Price ")
fig.show()
df['COUNTER'] = 1
insight32 = df.groupby(['Transmission','Body Type'])['COUNTER'].agg(['count'])
insight32['Percentage'] = ((insight32['count'] / 1099)*100).round(2)
insight32['Percentage'] = insight32['Percentage'].astype(str) + '%'
insight32 = insight32.reset_index(['Transmission','Body Type'])
insight32['Transmission'] = insight32['Transmission'].replace(r'nan', "Not available", regex=True)
insight32['Body Type'] = insight32['Body Type'].replace(r'nan', "Not available", regex=True)
insight32 = insight32.nlargest(10,'count')
insight32.reset_index(inplace=True,drop=True)
insight32.style.background_gradient(cmap='Reds')
| Transmission | Body Type | count | Percentage | |
|---|---|---|---|---|
| 0 | Automatic | Sedan | 128 | 11.65% |
| 1 | Automatic | Hardtop - Coupe | 114 | 10.37% |
| 2 | Not available | Sedan | 90 | 8.19% |
| 3 | Automatic | Wagon | 75 | 6.82% |
| 4 | Manual | Hardtop - Coupe | 68 | 6.19% |
| 5 | Not available | Hardtop - Coupe | 61 | 5.55% |
| 6 | Manual | Sedan | 56 | 5.1% |
| 7 | Not available | Not available | 54 | 4.91% |
| 8 | Automatic | Ute | 50 | 4.55% |
| 9 | Manual | Convertible | 40 | 3.64% |
fig = px.bar(insight32,x="Transmission", y="count", color="Body Type",barmode='group',text = "Percentage",
title = "Top 10 Transmission according to Body Type")
fig.show()
df['COUNTER'] = 1
insight33 = df.groupby(['Fuel Type','Body Type'])['COUNTER'].agg(['count'])
insight33['Percentage'] = ((insight33['count'] / 1099)*100).round(2)
insight33['Percentage'] = insight33['Percentage'].astype(str) + '%'
insight33 = insight33.reset_index(['Fuel Type','Body Type'])
insight33['Fuel Type'] = insight33['Fuel Type'].replace(r'nan', "Not available", regex=True)
insight33['Body Type'] = insight33['Body Type'].replace(r'nan', "Not available", regex=True)
insight33 = insight33.nlargest(10,'count')
insight33.reset_index(inplace=True,drop=True)
insight33.style.background_gradient(cmap='Reds')
| Fuel Type | Body Type | count | Percentage | |
|---|---|---|---|---|
| 0 | Not available | Hardtop - Coupe | 112 | 10.19% |
| 1 | Not available | Sedan | 112 | 10.19% |
| 2 | Premium Unleaded | Hardtop - Coupe | 70 | 6.37% |
| 3 | Premium Unleaded | Sedan | 68 | 6.19% |
| 4 | Unleaded | Sedan | 62 | 5.64% |
| 5 | Not available | Not available | 54 | 4.91% |
| 6 | Unleaded | Hardtop - Coupe | 46 | 4.19% |
| 7 | Unleaded | Convertible | 43 | 3.91% |
| 8 | Diesel | Wagon | 41 | 3.73% |
| 9 | Not available | Convertible | 35 | 3.18% |
fig = px.bar(insight33,x="Fuel Type", y="count", color="Body Type",barmode='group',text = "Percentage",
title = "Top 10 Fuel Type according to Body Type")
fig.show()
insight34 = df.groupby(['Make', 'Model', 'Year']).size().unstack(level=2, fill_value=0)
insight34 = (insight34.stack().reset_index(name='Total Count'))
insight34['Percentage'] = ((insight34['Total Count'] / 1099)*100).round(2)
insight34['Percentage'] = insight34['Percentage'].astype(str) + '%'
insight_34 = insight34.nlargest(20,'Total Count')
pd.crosstab([insight34['Make'], insight34['Total Count']],[insight34['Year']],margins=True)
| Year | 1920 | 1922 | 1923 | 1926 | 1929 | 1931 | 1932 | 1934 | 1935 | 1936 | ... | 2013 | 2014 | 2015 | 2016 | 2017 | 2018 | 2019 | 2020 | 2021 | All | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Make | Total Count | |||||||||||||||||||||
| AC | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | ... | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 85 |
| 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | |
| ALFA ROMEO | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | ... | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 86 |
| 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | |
| ASTON MARTIN | 0 | 6 | 6 | 6 | 6 | 6 | 6 | 6 | 6 | 6 | 6 | ... | 5 | 6 | 6 | 6 | 6 | 6 | 6 | 6 | 5 | 516 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| WESTFIELD | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | ... | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 86 |
| 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | |
| nan | 0 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | ... | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 258 |
| 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | |
| All | 716 | 716 | 716 | 716 | 716 | 716 | 716 | 716 | 716 | 716 | ... | 716 | 716 | 716 | 716 | 716 | 716 | 716 | 716 | 716 | 62292 |
249 rows × 88 columns
fig = px.bar(insight_34,x="Make", y="Total Count", color="Year",barmode='group',text="Percentage",
title = "Top 20 Car Make according to Year")
fig.show()
pd.crosstab([insight34['Model'], insight34['Total Count']],[insight34['Year']],margins=True)
| Year | 1920 | 1922 | 1923 | 1926 | 1929 | 1931 | 1932 | 1934 | 1935 | 1936 | ... | 2013 | 2014 | 2015 | 2016 | 2017 | 2018 | 2019 | 2020 | 2021 | All | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Model | Total Count | |||||||||||||||||||||
| COUPE Master 85 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | ... | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 86 |
| 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | |
| X7 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | ... | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 86 |
| 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | |
| 1 SERIES E82 LCI MY13 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | ... | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 86 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| Cooper BRM F2 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | ... | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 86 |
| 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | |
| Jaguar E-Type 4.2 F H Coupe | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | ... | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 86 |
| 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | |
| All | 716 | 716 | 716 | 716 | 716 | 716 | 716 | 716 | 716 | 716 | ... | 716 | 716 | 716 | 716 | 716 | 716 | 716 | 716 | 716 | 62292 |
1454 rows × 88 columns
fig = px.bar(insight_34,x="Model", y="Total Count", color="Year",barmode='group',text="Percentage",
title = "Top 20 Car Model according to Year")
fig.show()
insight35 = df.groupby(['Make', 'Year', 'Price']).size().unstack(level=2, fill_value=0)
insight35 = (insight35.stack().reset_index(name='Total Count'))
insight35['Percentage'] = ((insight35['Total Count'] / 1099)*100).round(2)
insight35['Percentage'] = insight35['Percentage'].astype(str) + '%'
insight_35 = insight35.nlargest(20,'Total Count')
pd.crosstab([insight35['Make'], insight35['Total Count']],[insight35['Year']],margins=True)
| Year | 1920 | 1922 | 1923 | 1926 | 1929 | 1931 | 1932 | 1934 | 1935 | 1936 | ... | 2013 | 2014 | 2015 | 2016 | 2017 | 2018 | 2019 | 2020 | 2021 | All | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Make | Total Count | |||||||||||||||||||||
| AC | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 734 |
| 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | |
| ALFA ROMEO | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 367 |
| 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | |
| ASTON MARTIN | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 366 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 367 | 2201 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| WESTFIELD | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 367 |
| 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | |
| nan | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1101 |
| 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | |
| All | 368 | 368 | 368 | 368 | 736 | 368 | 368 | 1104 | 368 | 736 | ... | 5152 | 5152 | 6624 | 4048 | 4784 | 4416 | 2576 | 2208 | 1104 | 190624 |
246 rows × 88 columns
fig = px.bar(insight_35,x="Make", y="Total Count", color="Year",barmode='group',text="Percentage",
title = "Top 20 Car Make according to Year")
fig.show()
pd.crosstab([insight35['Price'], insight35['Total Count']],[insight35['Year']],margins=True)
| Year | 1920 | 1922 | 1923 | 1926 | 1929 | 1931 | 1932 | 1934 | 1935 | 1936 | ... | 2013 | 2014 | 2015 | 2016 | 2017 | 2018 | 2019 | 2020 | 2021 | All | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Price | Total Count | |||||||||||||||||||||
| 0 | 0 | 1 | 1 | 1 | 1 | 2 | 1 | 1 | 3 | 1 | 2 | ... | 14 | 14 | 18 | 11 | 13 | 12 | 7 | 6 | 3 | 517 |
| 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | |
| 1950 | 0 | 1 | 1 | 1 | 1 | 2 | 1 | 1 | 3 | 1 | 2 | ... | 14 | 14 | 18 | 11 | 13 | 12 | 7 | 6 | 3 | 517 |
| 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | |
| 2500 | 0 | 1 | 1 | 1 | 1 | 2 | 1 | 1 | 3 | 1 | 2 | ... | 14 | 14 | 18 | 11 | 13 | 12 | 7 | 6 | 3 | 517 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 219999000 | 0 | 1 | 1 | 1 | 1 | 2 | 1 | 1 | 3 | 1 | 2 | ... | 14 | 14 | 18 | 11 | 13 | 12 | 7 | 6 | 3 | 517 |
| 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | |
| 2000020000 | 0 | 1 | 1 | 1 | 1 | 2 | 1 | 1 | 3 | 1 | 2 | ... | 14 | 14 | 18 | 11 | 13 | 12 | 7 | 6 | 3 | 517 |
| 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | |
| All | 368 | 368 | 368 | 368 | 736 | 368 | 368 | 1104 | 368 | 736 | ... | 5152 | 5152 | 6624 | 4048 | 4784 | 4416 | 2576 | 2208 | 1104 | 190624 |
832 rows × 88 columns
fig = px.bar(insight_35,x="Year", y="Total Count", color="Price",barmode='group',text="Percentage",
title = "Top 20 Car Price according to Year")
fig.show()
insight36 = df.groupby(['Make', 'Model', 'Body Type']).size().unstack(level=2, fill_value=0)
insight36 = (insight36.stack().reset_index(name='Total Count'))
insight36['Percentage'] = ((insight36['Total Count'] / 1099)*100).round(2)
insight36['Percentage'] = insight36['Percentage'].astype(str) + '%'
insight_36 = insight36.nlargest(10,'Total Count')
fig = px.bar(insight_36,x="Make", y="Total Count", color="Model",barmode='group',text="Percentage",
title = "Top 10 Car Make according to Model")
fig.show()
pd.crosstab([insight36['Make'], insight36['Total Count']],[insight36['Body Type']],margins=True)
| Body Type | Cab Chassis | Convertible | Fastback | Hardtop - Coupe | Hatchback | Limousine | Open Wheeler | Panel Van | Roadster | Sedan | Soft Top | Style Side | Targa | Tourer | Tray | Ute | Van | Wagon | nan | All | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Make | Total Count | ||||||||||||||||||||
| AC | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 17 |
| 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | |
| ALFA ROMEO | 0 | 1 | 1 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 18 |
| 2 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | |
| ASTON MARTIN | 0 | 6 | 6 | 6 | 0 | 6 | 6 | 6 | 6 | 6 | 6 | 6 | 6 | 6 | 6 | 6 | 6 | 6 | 6 | 6 | 108 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| WESTFIELD | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 18 |
| 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | |
| nan | 0 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 0 | 54 |
| 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 3 | |
| All | 716 | 716 | 716 | 716 | 716 | 716 | 716 | 716 | 716 | 716 | 716 | 716 | 716 | 716 | 716 | 716 | 716 | 716 | 716 | 13604 |
261 rows × 20 columns
fig = px.bar(insight_36,x="Model", y="Total Count", color="Body Type",barmode='group',text="Percentage",
title = "Top 10 Car Makes according to Body Type")
fig.show()
insight37 = df.groupby(['Model', 'Body Type', 'Price']).size().unstack(level=2, fill_value=0)
insight37 = (insight37.stack().reset_index(name='Total Count'))
insight37['Percentage'] = ((insight37['Total Count'] / 1099)*100).round(2)
insight37['Percentage'] = insight37['Percentage'].astype(str) + '%'
insight_37 = insight37.nlargest(10,'Total Count')
pd.crosstab([insight37['Model'], insight37['Total Count']],[insight37['Body Type']],margins=True)
| Body Type | Cab Chassis | Convertible | Fastback | Hardtop - Coupe | Hatchback | Limousine | Open Wheeler | Panel Van | Roadster | Sedan | Soft Top | Style Side | Targa | Tourer | Tray | Ute | Van | Wagon | nan | All | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Model | Total Count | ||||||||||||||||||||
| COUPE Master 85 | 0 | 0 | 0 | 0 | 367 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 367 |
| 3 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | |
| X7 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 367 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 367 |
| 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | |
| 1 SERIES E82 LCI MY13 | 0 | 0 | 0 | 0 | 367 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 367 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| Cooper BRM F2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 367 | 367 |
| 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | |
| Jaguar E-Type 4.2 F H Coupe | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 367 | 367 |
| 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | |
| All | 2944 | 28336 | 5152 | 58144 | 8096 | 2208 | 1472 | 3680 | 10672 | 71392 | 1472 | 736 | 1472 | 1104 | 1104 | 23184 | 8464 | 26128 | 19504 | 275264 |
1462 rows × 20 columns
fig = px.bar(insight_37,x="Model", y="Total Count", color="Body Type",barmode='group',text="Percentage",
title = "Top 10 Car Model according to Body Type")
fig.show()
pd.crosstab([insight37['Model'], insight37['Total Count']],[insight37['Price']],margins=True)
| Price | 0 | 1950 | 2500 | 2800 | 3300 | 3990 | 4500 | 4950 | 5500 | 5990 | ... | 41999000 | 46599000 | 47999000 | 49999000 | 54999000 | 69999000 | 99099000 | 219999000 | 2000020000 | All | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Model | Total Count | |||||||||||||||||||||
| COUPE Master 85 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | ... | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 367 |
| 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | |
| X7 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | ... | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 367 |
| 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | |
| 1 SERIES E82 LCI MY13 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | ... | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 367 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| Cooper BRM F2 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | ... | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 367 |
| 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | |
| Jaguar E-Type 4.2 F H Coupe | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | ... | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 367 |
| 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | |
| All | 748 | 748 | 748 | 748 | 748 | 748 | 748 | 748 | 748 | 748 | ... | 748 | 748 | 748 | 748 | 748 | 748 | 748 | 748 | 748 | 275264 |
1462 rows × 369 columns
fig = px.bar(insight_37,x="Model", y="Total Count", color="Price",barmode='group',text="Percentage",
title = "Top 10 Car Model according to Price")
fig.show()
insight38 = df.groupby(['Body Type', 'Fuel Type', 'Price']).size().unstack(level=2, fill_value=0)
insight38 = (insight38.stack().reset_index(name='Total Count'))
insight38['Percentage'] = ((insight38['Total Count'] / 1099)*100).round(2)
insight38['Body Type'] = insight38['Body Type'].replace(r'nan', "Not available", regex=True)
insight38['Fuel Type'] = insight38['Fuel Type'].replace(r'nan', "Not available", regex=True)
insight38['Percentage'] = insight38['Percentage'].astype(str) + '%'
insight_38 = insight38.nlargest(10,'Total Count')
pd.crosstab([insight38['Body Type'], insight38['Total Count']],[insight38['Fuel Type']],margins=True)
| Fuel Type | Diesel | Dual Fuel | Ethanol | Hybrid | LPG | Leaded Petrol | Not available | Premium Unleaded | Unleaded | All | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| Body Type | Total Count | ||||||||||
| Cab Chassis | 0 | 363 | 0 | 0 | 0 | 0 | 0 | 367 | 367 | 367 | 1464 |
| 1 | 5 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 8 | |
| Convertible | 0 | 367 | 0 | 0 | 0 | 0 | 365 | 346 | 350 | 337 | 1765 |
| 1 | 1 | 0 | 0 | 0 | 0 | 0 | 13 | 10 | 20 | 44 | |
| 2 | 0 | 0 | 0 | 0 | 0 | 3 | 7 | 7 | 10 | 27 | |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| Wagon | 1 | 12 | 0 | 0 | 0 | 0 | 1 | 14 | 13 | 7 | 47 |
| 2 | 11 | 0 | 0 | 0 | 0 | 0 | 5 | 4 | 9 | 29 | |
| 3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 2 | |
| 4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | |
| All | 2944 | 1472 | 736 | 368 | 1472 | 4048 | 5520 | 5520 | 6256 | 28336 |
77 rows × 10 columns
fig = px.bar(insight_38,x="Body Type", y="Total Count", color="Fuel Type",barmode='group',text="Percentage",
title = "Top 10 Car Body Type according to Fuel Type")
fig.show()
pd.crosstab([insight38['Body Type'], insight38['Total Count']],[insight38['Price']],margins=True)
| Price | 0 | 1950 | 2500 | 2800 | 3300 | 3990 | 4500 | 4950 | 5500 | 5990 | ... | 41999000 | 46599000 | 47999000 | 49999000 | 54999000 | 69999000 | 99099000 | 219999000 | 2000020000 | All | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Body Type | Total Count | |||||||||||||||||||||
| Cab Chassis | 0 | 4 | 4 | 4 | 4 | 4 | 4 | 4 | 4 | 4 | 3 | ... | 4 | 4 | 4 | 4 | 4 | 4 | 4 | 4 | 4 | 1464 |
| 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | |
| Convertible | 0 | 5 | 5 | 5 | 5 | 5 | 5 | 5 | 5 | 5 | 5 | ... | 5 | 5 | 5 | 5 | 5 | 5 | 5 | 5 | 5 | 1765 |
| 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 44 | |
| 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 27 | |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| Wagon | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 47 |
| 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 29 | |
| 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | |
| 4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | |
| All | 77 | 77 | 77 | 77 | 77 | 77 | 77 | 77 | 77 | 77 | ... | 77 | 77 | 77 | 77 | 77 | 77 | 77 | 77 | 77 | 28336 |
77 rows × 369 columns
fig = px.bar(insight_38,x="Body Type", y="Total Count", color="Price",barmode='group',text="Percentage",
title = "Top 10 Car Body Type according to Price")
fig.show()
insight39 = df.groupby(['Model', 'Fuel Type', 'Price']).size().unstack(level=2, fill_value=0)
insight39 = (insight39.stack().reset_index(name='Total Count'))
insight39['Percentage'] = ((insight39['Total Count'] / 1099)*100).round(2)
insight39['Fuel Type'] = insight39['Fuel Type'].replace(r'nan', "Not available", regex=True)
insight39['Percentage'] = insight39['Percentage'].astype(str) + '%'
insight_39 = insight39.nlargest(10,'Total Count')
pd.crosstab([insight39['Model'], insight39['Total Count']],[insight39['Fuel Type']],margins=True)
| Fuel Type | Diesel | Dual Fuel | Ethanol | Hybrid | LPG | Leaded Petrol | Not available | Premium Unleaded | Unleaded | All | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| Model | Total Count | ||||||||||
| COUPE Master 85 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 367 | 367 |
| 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | |
| X7 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 367 | 0 | 367 |
| 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | |
| 1 SERIES E82 LCI MY13 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 367 | 0 | 0 | 367 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| Cooper BRM F2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 367 | 0 | 0 | 367 |
| 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | |
| Jaguar E-Type 4.2 F H Coupe | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 367 | 0 | 0 | 367 |
| 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | |
| All | 24288 | 1840 | 736 | 368 | 1840 | 15824 | 108560 | 60720 | 63664 | 277840 |
1462 rows × 10 columns
fig = px.bar(insight_39,x="Model", y="Total Count", color="Fuel Type",barmode='group',text="Percentage",
title = "Top 10 Car Model according to Fuel Type")
fig.show()
pd.crosstab([insight39['Model'], insight39['Total Count']],[insight39['Price']],margins=True)
| Price | 0 | 1950 | 2500 | 2800 | 3300 | 3990 | 4500 | 4950 | 5500 | 5990 | ... | 41999000 | 46599000 | 47999000 | 49999000 | 54999000 | 69999000 | 99099000 | 219999000 | 2000020000 | All | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Model | Total Count | |||||||||||||||||||||
| COUPE Master 85 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | ... | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 367 |
| 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | |
| X7 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | ... | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 367 |
| 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | |
| 1 SERIES E82 LCI MY13 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | ... | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 367 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| Cooper BRM F2 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | ... | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 367 |
| 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | |
| Jaguar E-Type 4.2 F H Coupe | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | ... | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 367 |
| 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | |
| All | 755 | 755 | 755 | 755 | 755 | 755 | 755 | 755 | 755 | 755 | ... | 755 | 755 | 755 | 755 | 755 | 755 | 755 | 755 | 755 | 277840 |
1462 rows × 369 columns
fig = px.bar(insight_39,x="Model", y="Total Count", color="Price",barmode='group',text="Percentage",
title = "Top 10 Car Model according to Price")
fig.show()
insight40 = df.groupby(['Model', 'Listing Type', 'Price']).size().unstack(level=2, fill_value=0)
insight40 = (insight40.stack().reset_index(name='Total Count'))
insight40['Percentage'] = ((insight40['Total Count'] / 1099)*100).round(2)
insight40['Listing Type'] = insight40['Listing Type'].replace(r'nan', "Not available", regex=True)
insight40['Percentage'] = insight40['Percentage'].astype(str) + '%'
insight_40 = insight40.nlargest(10,'Total Count')
pd.crosstab([insight40['Model'], insight40['Total Count']],[insight40['Listing Type']],margins=True)
| Listing Type | New | Not available | Used | All | |
|---|---|---|---|---|---|
| Model | Total Count | ||||
| COUPE Master 85 | 0 | 0 | 0 | 367 | 367 |
| 3 | 0 | 0 | 1 | 1 | |
| X7 | 0 | 0 | 0 | 367 | 367 |
| 2 | 0 | 0 | 1 | 1 | |
| 1 SERIES E82 LCI MY13 | 0 | 0 | 0 | 367 | 367 |
| ... | ... | ... | ... | ... | ... |
| Cooper BRM F2 | 0 | 0 | 367 | 0 | 367 |
| 1 | 0 | 1 | 0 | 1 | |
| Jaguar E-Type 4.2 F H Coupe | 0 | 0 | 367 | 0 | 367 |
| 1 | 0 | 1 | 0 | 1 | |
| All | 1104 | 19504 | 241040 | 261648 |
1462 rows × 4 columns
fig = px.bar(insight_40,x="Model", y="Total Count", color="Listing Type",barmode='group',text="Percentage",
title = "Top 10 Car Model according to Year")
fig.show()
pd.crosstab([insight40['Model'], insight40['Total Count']],[insight40['Price']],margins=True)
| Price | 0 | 1950 | 2500 | 2800 | 3300 | 3990 | 4500 | 4950 | 5500 | 5990 | ... | 41999000 | 46599000 | 47999000 | 49999000 | 54999000 | 69999000 | 99099000 | 219999000 | 2000020000 | All | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Model | Total Count | |||||||||||||||||||||
| COUPE Master 85 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | ... | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 367 |
| 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | |
| X7 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | ... | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 367 |
| 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | |
| 1 SERIES E82 LCI MY13 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | ... | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 367 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| Cooper BRM F2 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | ... | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 367 |
| 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | |
| Jaguar E-Type 4.2 F H Coupe | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | ... | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 367 |
| 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | |
| All | 711 | 711 | 711 | 711 | 711 | 711 | 711 | 711 | 711 | 711 | ... | 711 | 711 | 711 | 711 | 711 | 711 | 711 | 711 | 711 | 261648 |
1462 rows × 369 columns
fig = px.bar(insight_40,x="Model", y="Total Count", color="Price",barmode='group',text="Percentage",
title = "Top 10 Car Model according to Price")
fig.show()